This document downloads data on:
and merges it with publicly available data on the Israeli population from the World Bank and the Penn World Tables.
The results are plots which show:
Loading needed packages and setting the plot theme:
pacman::p_load(tidyverse, scales, WDI, pwt9, readxl, plotly, ggpmisc, patchwork)
ggplottheme <- ggpubr::theme_classic2() +
theme(
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
plot.title = element_text(hjust = 1),
plot.subtitle = element_text(hjust = 1),
plot.caption = element_text(hjust = 1),
axis.ticks = element_blank(),
strip.background = element_blank(),
panel.spacing = unit(2, "lines")
)
theme_set(ggplottheme)
Download the data from the CBS:
download.file(url = "https://www.cbs.gov.il/he/mediarelease/doclib/2019/347/27_19_347t1.xls",
destfile = "cbs_accidents.xls",
mode = "wb")
download.file(url = "https://www.cbs.gov.il/he/publications/doclib/2019/1772/t01.xls",
destfile = "cbs_km.xls",
mode = "wb")
download.file(url = "https://www.cbs.gov.il/he/publications/doclib/2019/1762/t01.xls",
destfile = "cbs_vehicles.xls",
mode = "wb")
Load the data to R, with some wrangling to transform from human-readable to machine-readable:
cbs_accidents <-read_excel(path = "cbs_accidents.xls",
col_names = c("year","fatalities","drop")
)%>%
select(-drop) %>%
mutate_all(as.numeric) %>%
filter(!is.na(fatalities), !is.na(year))
## Warning: Problem with `mutate()` input `year`.
## x NAs introduced by coercion
## i Input `year` is `.Primitive("as.double")(year)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
## Warning: Problem with `mutate()` input `fatalities`.
## x NAs introduced by coercion
## i Input `fatalities` is `.Primitive("as.double")(fatalities)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
cbs_km <- tibble(year = c(1975,1995,2000,2010:2018),
km_total = c(9232, 30633, 36482, 49870, 50576, 50107, 51207, 52400, 54820, 57220, 59602, 61196),
km_avg_percar = c(NA, 21.6, 20.5, 19.9, 19.1, 18.2, 18.1, 17.9, 17.9, 17.9, 17.7, 17.5))
cbs_vehicles <- read_excel(path = "cbs_vehicles.xls",
skip = 7) %>%
rename(year = ...12, vehicles = total) %>%
select(year, vehicles) %>%
filter(!is.na(year)) %>%
mutate(year2 = if_else(str_length(year) == 4, as.double(year), NA_real_),
year2 = if_else(is.na(year2), lag(year2) -1, year2)) %>%
select(-year, year = year2, vehicles) %>%
filter(!is.na(vehicles), vehicles != 100, !is.na(year)) %>%
mutate(vehicles = as.integer(vehicles))
## New names:
## * `` -> ...1
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `4 tons` -> `4 tons...7`
## * ...
## Warning: Problem with `mutate()` input `year2`.
## x NAs introduced by coercion
## i Input `year2` is `if_else(str_length(year) == 4, as.double(year), NA_real_)`.
## Warning in if_else(str_length(year) == 4, as.double(year), NA_real_): NAs
## introduced by coercion
Manually update fatalities with data for 2019 (https://www.cbs.gov.il/he/mediarelease/DocLib/2020/151/27_20_151b.pdf):
cbs_accidents <- cbs_accidents %>%
add_row(year = 2019, fatalities = 355)
Estimate KM traveled for 2019 (updated data not available as of today):
km_fit <- lm(data = cbs_km %>% filter(year >= 2010),
formula = km_total ~ year)
modelsummary::modelsummary(km_fit)
| Model 1 | |
|---|---|
| (Intercept) | -2974307.344 |
| (350604.259) | |
| year | 1503.683 |
| (174.083) | |
| Num.Obs. | 9 |
| R2 | 0.914 |
| R2 Adj. | 0.902 |
| AIC | 159.0 |
| BIC | 159.6 |
| Log.Lik. | -76.500 |
cbs_km <- cbs_km %>%
add_row(year = 2019)
cbs_km$km_total_pred <- predict(km_fit, cbs_km)
cbs_km <- cbs_km %>%
mutate(km_total = if_else(year == 2019, round(km_total_pred), km_total ))
Manually update vehicles data for 2019 (https://www.cbs.gov.il/he/mediarelease/Pages/2020/כלי-רכב-מנועיים-בישראל-בשנת-2019.aspx):
cbs_vehicles <- cbs_vehicles %>%
add_row(year = 2019, vehicles = 3600600)
Load world bank data and PWT data on population in Israel:
israel_pop1 <- WDI(country = "IL", indicator = c(pop = "SP.POP.TOTL"))
data("pwt9.1")
israel_pop2 <- pwt9.1 %>%
filter(country == "Israel") %>%
select(country, year, pop) %>%
mutate(pop = pop * 10^6)
israel_pop <- full_join(israel_pop1, israel_pop2, by="year") %>%
mutate(pop = if_else(!is.na(pop.x), pop.x, pop.y))
Merge to create rates then reshape to easily plot all variations:
merged <- left_join(cbs_accidents, israel_pop, by="year") %>%
left_join(cbs_km, by="year") %>%
left_join(cbs_vehicles, by = "year") %>%
mutate(rate = (fatalities/pop) * 10^5,
rate_1bkm = (fatalities/km_total) * 10^3,
rate_100kcars = (fatalities/vehicles) * 10^5) %>%
pivot_longer(cols = c("fatalities", "rate", "rate_1bkm", "rate_100kcars"),
names_to = "type", values_to = "value") %>%
mutate(type = factor(type,
levels = c("fatalities", "rate", "rate_1bkm", "rate_100kcars"),
labels = c("הרוגים",
"שיעור הרוגים ל-100 אלף איש",
"הרוגים למיליארד קילומטר נסועה",
"הרוגים ל-100 אלף כלי רכב"))
) %>%
group_by(type) %>%
mutate(max = max(value, na.rm=TRUE), min = min(value, na.rm=TRUE))
Plot absolute and rate of fatalities:
plot1 <- merged %>%
filter(type %in% c("הרוגים",
"שיעור הרוגים ל-100 אלף איש")) %>%
ggplot(aes(x=year, y=value, color=type)) +
geom_line(size = 1) +
geom_hline(aes(yintercept = max), color="grey50", linetype="dashed") +
geom_hline(aes(yintercept = min), color="grey50", linetype="dashed") +
scale_y_continuous(breaks = pretty_breaks(n = 10)) +
scale_x_continuous(breaks = pretty_breaks(n = 10)) +
scale_color_manual(values = c("dodgerblue1","firebrick1"),
guide = FALSE) +
facet_wrap(~type, scales = "free") +
labs(x = "שנה",
y = "",
title = "תאונות דרכים בישראל: 2019-1949",
subtitle = "קווים אופקיים מציגים את המקסימום והמינימום",
caption = "מקור: נתוני הלשכה המרכזית לסטטיסטיקה")
plot1
ggsave(filename = "accidents_plot.png", width = 7)
Plot.ly for interactive chart:
plotly<-ggplotly(plot1, tooltip = c("x","y"))
hide_legend(plotly)
Plotting per KM traveled:
plot2 <- merged %>%
filter(type == "הרוגים למיליארד קילומטר נסועה",
!is.na(value)) %>%
ggplot(aes(x=factor(year), y=value)) +
geom_hline(aes(yintercept = max), color="grey50", linetype="dashed") +
geom_hline(aes(yintercept = min), color="grey50", linetype="dashed") +
geom_col(color = "dodgerblue1", fill="dodgerblue1", width = 0.8) +
geom_label(aes(label = round(value, digits = 2)), color="dodgerblue1") +
scale_y_continuous(breaks = pretty_breaks(n = 10),
expand = expansion(mult = c(0, 0.1))) +
labs(x = "שנה",
y = "",
title = "הרוגים למיליארד קילומטר נסועה בישראל: שנים נבחרות",
subtitle = paste0("בשנת 2019 הנסועה נאמדה על בסיס הנסועה בין 2010 ל-2018",
"\n",
"קווים אופקיים מציגים את המקסימום והמינימום"),
caption = "מקור: נתוני הלשכה המרכזית לסטטיסטיקה")
plot2
ggsave(filename = "deaths_per1Bkm.png", width = 7)
## Saving 7 x 5 in image
Plotting per cars:
plot3 <- merged %>%
filter(type == "הרוגים ל-100 אלף כלי רכב",
!is.na(value)) %>%
ggplot(aes(x=year, y=value)) +
geom_hline(aes(yintercept = max), color="grey50", linetype="dashed") +
geom_hline(aes(yintercept = min), color="grey50", linetype="dashed") +
geom_line(color = "dodgerblue1", size = 1) +
scale_y_continuous(breaks = pretty_breaks(n = 10)) +
scale_x_continuous(breaks = pretty_breaks(n = 10)) +
labs(x = "שנה",
y = "",
title = "הרוגים ל-100 אלף כלי רכב בישראל: 2019-1965",
subtitle = "קווים אופקיים מציגים את המקסימום והמינימום",
caption = "מקור: נתוני הלשכה המרכזית לסטטיסטיקה")
plot3
ggsave(filename = "deaths_per1Kvehicles.png", width = 7)
## Saving 7 x 5 in image
Plotting per cars (focus on data from 1990):
plot4 <- merged %>%
filter(type == "הרוגים ל-100 אלף כלי רכב",
!is.na(value),
year >= 1990) %>%
ggplot(aes(x=year, y=value)) +
geom_line(color = "dodgerblue1", size = 1) +
scale_y_continuous(breaks = pretty_breaks(n = 10)) +
scale_x_continuous(breaks = pretty_breaks(n = 10)) +
labs(x = "שנה",
y = "",
title = "הרוגים ל-100 אלף כלי רכב בישראל: 2019-1990",
subtitle = "",
caption = "מקור: נתוני הלשכה המרכזית לסטטיסטיקה")
plot4
ggsave(filename = "deaths_per1Kvehicles_from1990.png", width = 7)
## Saving 7 x 5 in image